﻿## ---------------------------
## Script name: 1_generate_regression_data.R
## Author:         Leo Y. Yang
## Date Created:    2022-06-03
## Email:     LeoYang@ucsd.edu
##
## ---------------------------
## Purpose of script:
##   Generate the manipulated data 
## to run analyses in Stata
##
## ---------------------------

# Load packages -----------------------------------------------------------
library(tidyverse)
library(haven)
library(fst)
library(foreach)

# Setup Working Directory -------------------------------------------------
setwd("/Your/Working/Directory/PSRM_code_data")

# Load Dyad Province Data -------------------------------------------------
dyad.province.data <- read_fst("data/dyad_province_data.fst")


# Map cadres' Chinese names, Pinyin names, and English names --------------
Chinese.Pinyin.name <- data.frame(Chinese_name = c("李瑞环", "温家宝", "贾庆林", "王岐山", "刘云山", "李长春", "张德江", "周永康", "江泽民",
                                                   "李岚清", "胡锦涛", "黄菊", "尉健行", "俞正声", "吴邦国", "李克强", "张高丽", "曾庆红",
                                                   "吴官正", "罗干", "朱镕基", "贺国强", "李鹏", "习近平"),
                                  Pinyin_name = c("Liruihuan", "Wenjiabao", "Jiaqinglin", "Wangqishan", "Liuyunshan", "Lichangchun", "Zhangdejiang",
                                                  "Zhouyongkang", "Jiangzemin", "Lilanqing", "Hujintao", "Huangju", "Weijianxing", "Yuzhengsheng",
                                                  "Wubangguo", "Likeqiang", "Zhanggaoli", "Zengqinghong", "Wuguanzheng", "Luogan", "Zhurongji",
                                                  "Heguoqiang", "Lipeng", "Xijinping"),
                                  Eng_name = c("Li Ruihuan", "Wen Jiabao", "Jia Qinglin", "Wang Qishan", "Liu Yunshan", "Li Changchun", "Zhang Dejiang",
                                               "Zhou Yongkang", "Jiang Zemin", "Li Lanqing", "Hu Jintao", "Huang Ju", "Wei Jianxing", "Yu Zhengsheng",
                                               "Wu Bangguo", "Li Keqiang", "Zhang Gaoli", "Zeng Qinghong", "Wu Guanzheng", "Luo Gan", "Zhu Rongji",
                                               "He Guoqiang", "Li Peng", "Xi Jinping"),
                                  patron_code = seq(1:24))


# Generate Connection defined by workplace connection ---------------------
onehot.workplace.connection <- foreach(i = 1:length(Chinese.Pinyin.name$Pinyin_name), .combine = cbind) %do% {
  dyad.province.data  %>% 
    select(ecolleague, ncolleague, esupervisor, nsupervisor, egncolleague, ngncolleague, egnsupervisor, ngnsupervisor) %>% 
    mutate(!!paste("ework",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := ifelse(str_detect(ecolleague, as.character(Chinese.Pinyin.name$Chinese_name[i]))|str_detect(esupervisor, as.character(Chinese.Pinyin.name$Chinese_name[i])), 1, 0)) %>%
    mutate(!!paste("nwork",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := ifelse(str_detect(ncolleague, as.character(Chinese.Pinyin.name$Chinese_name[i]))|str_detect(nsupervisor, as.character(Chinese.Pinyin.name$Chinese_name[i])), 1, 0)) %>%
    mutate(!!paste("ecolleague",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := ifelse(str_detect(ecolleague, as.character(Chinese.Pinyin.name$Chinese_name[i])), 1, 0)) %>%
    mutate(!!paste("ncolleague",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := ifelse(str_detect(ncolleague, as.character(Chinese.Pinyin.name$Chinese_name[i])), 1, 0)) %>%
    mutate(!!paste("esupervisor",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := ifelse(str_detect(esupervisor, as.character(Chinese.Pinyin.name$Chinese_name[i])), 1, 0)) %>%
    mutate(!!paste("nsupervisor",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := ifelse(str_detect(nsupervisor, as.character(Chinese.Pinyin.name$Chinese_name[i])), 1, 0)) %>%
    # Generate the faction of governors
    mutate(!!paste("egnwork",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := ifelse(str_detect(egncolleague, as.character(Chinese.Pinyin.name$Chinese_name[i]))|str_detect(egnsupervisor, as.character(Chinese.Pinyin.name$Chinese_name[i])), 1, 0)) %>%
    mutate(!!paste("ngnwork",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := ifelse(str_detect(ngncolleague, as.character(Chinese.Pinyin.name$Chinese_name[i]))|str_detect(ngnsupervisor, as.character(Chinese.Pinyin.name$Chinese_name[i])), 1, 0)) %>%
    mutate(!!paste("egncolleague",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := ifelse(str_detect(egncolleague, as.character(Chinese.Pinyin.name$Chinese_name[i])), 1, 0)) %>%
    mutate(!!paste("ngncolleague",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := ifelse(str_detect(ngncolleague, as.character(Chinese.Pinyin.name$Chinese_name[i])), 1, 0)) %>%
    mutate(!!paste("egnsupervisor",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := ifelse(str_detect(egnsupervisor, as.character(Chinese.Pinyin.name$Chinese_name[i])), 1, 0)) %>%
    mutate(!!paste("ngnsupervisor",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := ifelse(str_detect(ngnsupervisor, as.character(Chinese.Pinyin.name$Chinese_name[i])), 1, 0)) %>%
    # Whether PS and GN are from the same faction
    mutate(!!paste("_pgnworkfaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := UQ(rlang::sym(paste("nwork",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("ngnwork",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1,
           !!paste("_pgeworkfaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := UQ(rlang::sym(paste("ework",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("egnwork",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1,
           !!paste("_pgncolleaguefaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := UQ(rlang::sym(paste("ncolleague",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("ngncolleague",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1,
           !!paste("_pgecolleaguefaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := UQ(rlang::sym(paste("ecolleague",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("egncolleague",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1,
           !!paste("_pgnsupervisorfaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := UQ(rlang::sym(paste("nsupervisor",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("ngnsupervisor",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1,
           !!paste("_pgesupervisorfaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := UQ(rlang::sym(paste("esupervisor",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("egnsupervisor",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1) %>% 
    # Whether PS of event province and PS of news province are from the same faction
    mutate(!!paste("_sameworkfaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := UQ(rlang::sym(paste("ework",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("nwork",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1) %>% 
    mutate(!!paste("_samecolleaguefaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := UQ(rlang::sym(paste("ecolleague",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("ncolleague",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1) %>% 
    mutate(!!paste("_samesupervisorfaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = "") := UQ(rlang::sym(paste("esupervisor",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("nsupervisor",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))) == 1) %>% 
    select(paste("ework",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""), paste("nwork",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""),
           paste("ecolleague",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""), paste("ncolleague",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""),
           paste("esupervisor",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""), paste("nsupervisor",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""),
           paste("_pgnworkfaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""), paste("_pgeworkfaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""),
           paste("_pgncolleaguefaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""), paste("_pgecolleaguefaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""),
           paste("_pgnsupervisorfaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""), paste("_pgesupervisorfaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""),
           paste("_sameworkfaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""), paste("_samecolleaguefaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""), paste("_samesupervisorfaction",as.character(Chinese.Pinyin.name$Pinyin_name[i]), sep = ""))
} %>% 
  mutate(pgnworkfaction = ifelse(rowSums(select(., starts_with("_pgnworkfaction"))) > 0, 1, 0),
         pgeworkfaction = ifelse(rowSums(select(., starts_with("_pgeworkfaction"))) > 0, 1, 0),
         pgncolleaguefaction = ifelse(rowSums(select(., starts_with("_pgncolleaguefaction"))) > 0, 1, 0),
         pgecolleaguefaction = ifelse(rowSums(select(., starts_with("_pgecolleaguefaction"))) > 0, 1, 0),
         pgnsupervisorfaction = ifelse(rowSums(select(., starts_with("_pgnsupervisorfaction"))) > 0, 1, 0),
         pgesupervisorfaction = ifelse(rowSums(select(., starts_with("_pgesupervisorfaction"))) > 0, 1, 0)) %>% 
  select(-starts_with("_pgnworkfaction"),
         -starts_with("_pgeworkfaction"),
         -starts_with("_pgncolleaguefaction"),
         -starts_with("_pgecolleaguefaction"),
         -starts_with("_pgnsupervisorfaction"),
         -starts_with("_pgesupervisorfaction")) %>% 
  mutate(sameworkfaction = ifelse(rowSums(select(., starts_with("_sameworkfaction"))) > 0, 1, 0),
         samecolleaguefaction = ifelse(rowSums(select(., starts_with("_samecolleaguefaction"))) > 0, 1, 0),
         samesupervisorfaction = ifelse(rowSums(select(., starts_with("_samesupervisorfaction"))) > 0, 1, 0)) %>% 
  select(-starts_with("_sameworkfaction"),
         -starts_with("_samecolleaguefaction"),
         -starts_with("_samesupervisorfaction")) %>% 
  mutate(nwork_num = rowSums(select(., starts_with("nwork"))),
         ework_num = rowSums(select(., starts_with("ework"))),
         ncolleague_num = rowSums(select(., starts_with("ncolleague"))),
         ecolleague_num = rowSums(select(., starts_with("ecolleague"))),
         nsupervisor_num = rowSums(select(., starts_with("nsupervisor"))),
         esupervisor_num = rowSums(select(., starts_with("esupervisor")))) %>% 
  mutate(nworkfactionno1 = ifelse(nwork_num == 0 & ework_num != 0, 1, 0),
         eworkfactionno1 = ifelse(nwork_num != 0 & ework_num == 0, 1, 0),
         bworkfactionno = ifelse(nwork_num == 0 & ework_num == 0, 1, 0),
         bworkfactionyes = ifelse(nwork_num != 0 & ework_num != 0, 1, 0),
         nworkfaction1 = ifelse(nwork_num == 1, 1, 0),
         nworkfaction2 = ifelse(nwork_num > 1, 1, 0),
         eworkfaction1 = ifelse(ework_num == 1, 1, 0),
         eworkfaction2 = ifelse(ework_num > 1, 1, 0)) %>% 
  mutate(ncolleaguefactionno1 = ifelse(ncolleague_num == 0 & ecolleague_num != 0, 1, 0),
         ecolleaguefactionno1 = ifelse(ncolleague_num != 0 & ecolleague_num == 0, 1, 0),
         bcolleaguefactionno = ifelse(ncolleague_num == 0 & ecolleague_num == 0, 1, 0),
         bcolleaguefactionyes = ifelse(ncolleague_num != 0 & ecolleague_num != 0, 1, 0),
         ncolleaguefaction1 = ifelse(ncolleague_num == 1, 1, 0),
         ncolleaguefaction2 = ifelse(ncolleague_num > 1, 1, 0),
         ecolleaguefaction1 = ifelse(ecolleague_num == 1, 1, 0),
         ecolleaguefaction2 = ifelse(ecolleague_num > 1, 1, 0)) %>% 
  mutate(nsupervisorfactionno1 = ifelse(nsupervisor_num == 0 & esupervisor_num != 0, 1, 0),
         esupervisorfactionno1 = ifelse(nsupervisor_num != 0 & esupervisor_num == 0, 1, 0),
         bsupervisorfactionno = ifelse(nsupervisor_num == 0 & esupervisor_num == 0, 1, 0),
         bsupervisorfactionyes = ifelse(nsupervisor_num != 0 & esupervisor_num != 0, 1, 0),
         nsupervisorfaction1 = ifelse(nsupervisor_num == 1, 1, 0),
         nsupervisorfaction2 = ifelse(nsupervisor_num > 1, 1, 0),
         esupervisorfaction1 = ifelse(esupervisor_num == 1, 1, 0),
         esupervisorfaction2 = ifelse(esupervisor_num > 1, 1, 0)) %>% 
  select(-ends_with("_num"))


# Generate three factions - others are defined as None factions -----------
onehot.three.faction.connection.none <- foreach(j = 2000:2014, .combine = bind_rows) %do% {
  # Load the mapping of Shanghai, Tuanpai, and Princelings
  three_faction <- read_csv("data/three_faction_list.csv") %>%
    mutate(year = as.character(year)) %>% 
    # let's combine the Jiang faction into the Shanghai faction
    mutate(faction = recode(faction, `Jiang` = "Shanghai")) %>% 
    left_join(Chinese.Pinyin.name) %>% 
    filter(year == j)
  
  three_faction_wide <- three_faction %>% 
    mutate(Pinyin_name = paste(Pinyin_name, "$", sep = "")) %>% 
    group_by(faction, year) %>%
    summarise(faction_keywords = paste(Pinyin_name, collapse = "|")) %>%
    ungroup()
  
  
  onehot.three.faction.connection <- foreach(i = 1:length(three_faction$Pinyin_name), .combine = cbind) %do% {
    dyad.province.data %>% 
      filter(year == j) %>% 
      select(newsprov, eventprov, year, ntotal, etotal, ngntotal, egntotal) %>% 
      mutate(!!paste("efactionThreeNone",as.character(three_faction$Pinyin_name[i]), sep = "") := ifelse(str_detect(etotal, as.character(three_faction$Chinese_name[i])), 1, 0)) %>%
      mutate(!!paste("nfactionThreeNone",as.character(three_faction$Pinyin_name[i]), sep = "") := ifelse(str_detect(ntotal, as.character(three_faction$Chinese_name[i])), 1, 0)) %>%
      mutate(!!paste("egnfactionThreeNone",as.character(three_faction$Pinyin_name[i]), sep = "") := ifelse(str_detect(egntotal, as.character(three_faction$Chinese_name[i])), 1, 0)) %>%
      mutate(!!paste("ngnfactionThreeNone",as.character(three_faction$Pinyin_name[i]), sep = "") := ifelse(str_detect(ngntotal, as.character(three_faction$Chinese_name[i])), 1, 0)) %>% 
      select(UQ(rlang::sym(paste("efactionThreeNone",as.character(three_faction$Pinyin_name[i]), sep = ""))), UQ(rlang::sym(paste("nfactionThreeNone",as.character(three_faction$Pinyin_name[i]), sep = ""))),
             UQ(rlang::sym(paste("egnfactionThreeNone",as.character(three_faction$Pinyin_name[i]), sep = ""))), UQ(rlang::sym(paste("ngnfactionThreeNone",as.character(three_faction$Pinyin_name[i]), sep = ""))))
  } 
  
  three.faction.dummy.result <- foreach(i = 1:length(three_faction_wide$faction), .combine = cbind) %do% {
    onehot.three.faction.connection %>% 
      mutate(!!paste("nThreeNone",as.character(three_faction_wide$faction[i]), sep = "") := ifelse(rowSums(select(., starts_with("nfactionThreeNone") & matches(three_faction_wide$faction_keywords[i]))) > 0, 1, 0)) %>% 
      mutate(!!paste("eThreeNone",as.character(three_faction_wide$faction[i]), sep = "") := ifelse(rowSums(select(., starts_with("efactionThreeNone") & matches(three_faction_wide$faction_keywords[i]))) > 0, 1, 0)) %>% 
      mutate(!!paste("ngnThreeNone",as.character(three_faction_wide$faction[i]), sep = "") := ifelse(rowSums(select(., starts_with("ngnfactionThreeNone") & matches(three_faction_wide$faction_keywords[i]))) > 0, 1, 0)) %>% 
      mutate(!!paste("egnThreeNone",as.character(three_faction_wide$faction[i]), sep = "") := ifelse(rowSums(select(., starts_with("egnfactionThreeNone") & matches(three_faction_wide$faction_keywords[i]))) > 0, 1, 0)) %>% 
      mutate(!!paste("_pgnThreefactionNone",as.character(three_faction_wide$faction[i]), sep = "") := ifelse(UQ(rlang::sym(paste("nThreeNone",as.character(three_faction_wide$faction[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("ngnThreeNone",as.character(three_faction_wide$faction[i]), sep = ""))) == 1, 1, 0)) %>% 
      mutate(!!paste("_pgeThreefactionNone",as.character(three_faction_wide$faction[i]), sep = "") := ifelse(UQ(rlang::sym(paste("eThreeNone",as.character(three_faction_wide$faction[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("egnThreeNone",as.character(three_faction_wide$faction[i]), sep = ""))) == 1, 1, 0)) %>% 
      mutate(!!paste("_sameThreefactionNone",as.character(three_faction_wide$faction[i]), sep = "") := UQ(rlang::sym(paste("nThreeNone",as.character(three_faction_wide$faction[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("eThreeNone",as.character(three_faction_wide$faction[i]), sep = ""))) == 1) %>% 
      select(UQ(rlang::sym(paste("eThreeNone",as.character(three_faction_wide$faction[i]), sep = ""))), UQ(rlang::sym(paste("nThreeNone",as.character(three_faction_wide$faction[i]), sep = ""))),
             UQ(rlang::sym(paste("egnThreeNone",as.character(three_faction_wide$faction[i]), sep = ""))), UQ(rlang::sym(paste("ngnThreeNone",as.character(three_faction_wide$faction[i]), sep = ""))),
             paste("_pgnThreefactionNone",as.character(three_faction_wide$faction[i]), sep = ""), paste("_pgeThreefactionNone",as.character(three_faction_wide$faction[i]), sep = ""), 
             paste("_sameThreefactionNone",as.character(three_faction_wide$faction[i]), sep = ""))
  }
  
  onehot.three.faction.connection <- onehot.three.faction.connection %>% 
    cbind(three.faction.dummy.result) %>% 
    mutate(pgnThreefactionNone = ifelse(rowSums(select(., starts_with("_pgnThreefactionNone"))) > 0, 1, 0),
           pgeThreefactionNone = ifelse(rowSums(select(., starts_with("_pgeThreefactionNone"))) > 0, 1, 0),) %>% 
    select(-starts_with("_pgnThreefactionNone"),
           -starts_with("_pgeThreefactionNone")) %>% 
    mutate(sameThreefactionNone = ifelse(rowSums(select(., starts_with("_sameThreefactionNone"))) > 0, 1, 0)) %>% 
    select(-starts_with("_sameThreefactionNone")) %>% 
    mutate(nThreeNone_num = rowSums(select(., starts_with("nThreeNone"))),
           eThreeNone_num = rowSums(select(., starts_with("eThreeNone")))) %>% 
    mutate(nThreefactionNoneno1 = ifelse(nThreeNone_num == 0 & eThreeNone_num != 0, 1, 0),
           eThreefactionNoneno1 = ifelse(nThreeNone_num != 0 & eThreeNone_num == 0, 1, 0),
           bThreefactionNoneno = ifelse(nThreeNone_num == 0 & eThreeNone_num == 0, 1, 0),
           bThreefactionNoneyes = ifelse(nThreeNone_num != 0 & eThreeNone_num != 0, 1, 0),
           nThreefactionNone1 = ifelse(nThreeNone_num == 1, 1, 0),
           nThreefactionNone2 = ifelse(nThreeNone_num > 1, 1, 0),
           eThreefactionNone1 = ifelse(eThreeNone_num == 1, 1, 0),
           eThreefactionNone2 = ifelse(eThreeNone_num > 1, 1, 0)) %>% 
    select(-ends_with("_num"))
  
  return(onehot.three.faction.connection)
} %>% 
  mutate_all(replace_na, replace = 0)

# Generate three factions - others are defined as one Single factions -----------
onehot.three.faction.connection.single <- foreach(j = 2000:2014, .combine = bind_rows) %do% {
  # Load the mapping of Shanghai, Tuanpai, and Princelings
  three_faction <- read_csv("data/three_faction_list.csv") %>%
    mutate(year = as.character(year)) %>%
    # let's combine the Jiang faction into the Shanghai faction
    mutate(faction = recode(faction, `Jiang` = "Shanghai")) %>%
    mutate(faction_with_nofaction_as_faction = recode(faction, `nofaction` = Pinyin_name)) %>%
    left_join(Chinese.Pinyin.name) %>%
    filter(year == j)
  
  three_faction_wide <- three_faction %>%
    mutate(Pinyin_name = paste(Pinyin_name, "$", sep = "")) %>%
    group_by(faction_with_nofaction_as_faction, year) %>%
    summarise(faction_keywords = paste(Pinyin_name, collapse = "|")) %>%
    ungroup()
  
  
  onehot.three.faction.connection <- foreach(i = 1:length(three_faction$Pinyin_name), .combine = cbind) %do% {
    dyad.province.data %>% 
      filter(year == j) %>%
      select(newsprov, eventprov, year, ntotal, etotal, ngntotal, egntotal) %>%
      mutate(!!paste("efactionThreeSingle",as.character(three_faction$Pinyin_name[i]), sep = "") := ifelse(str_detect(etotal, as.character(three_faction$Chinese_name[i])), 1, 0)) %>%
      mutate(!!paste("nfactionThreeSingle",as.character(three_faction$Pinyin_name[i]), sep = "") := ifelse(str_detect(ntotal, as.character(three_faction$Chinese_name[i])), 1, 0)) %>%
      mutate(!!paste("egnfactionThreeSingle",as.character(three_faction$Pinyin_name[i]), sep = "") := ifelse(str_detect(egntotal, as.character(three_faction$Chinese_name[i])), 1, 0)) %>%
      mutate(!!paste("ngnfactionThreeSingle",as.character(three_faction$Pinyin_name[i]), sep = "") := ifelse(str_detect(ngntotal, as.character(three_faction$Chinese_name[i])), 1, 0)) %>%
      select(UQ(rlang::sym(paste("efactionThreeSingle",as.character(three_faction$Pinyin_name[i]), sep = ""))), UQ(rlang::sym(paste("nfactionThreeSingle",as.character(three_faction$Pinyin_name[i]), sep = ""))),
             UQ(rlang::sym(paste("egnfactionThreeSingle",as.character(three_faction$Pinyin_name[i]), sep = ""))), UQ(rlang::sym(paste("ngnfactionThreeSingle",as.character(three_faction$Pinyin_name[i]), sep = ""))))
  }
  
  three.faction.dummy.result <- foreach(i = 1:length(three_faction_wide$faction_with_nofaction_as_faction), .combine = cbind) %do% {
    onehot.three.faction.connection %>%
      mutate(!!paste("nThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = "") := ifelse(rowSums(select(., starts_with("nfactionThreeSingle") & matches(three_faction_wide$faction_keywords[i]))) > 0, 1, 0)) %>%
      mutate(!!paste("eThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = "") := ifelse(rowSums(select(., starts_with("efactionThreeSingle") & matches(three_faction_wide$faction_keywords[i]))) > 0, 1, 0)) %>%
      mutate(!!paste("ngnThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = "") := ifelse(rowSums(select(., starts_with("ngnfactionThreeSingle") & matches(three_faction_wide$faction_keywords[i]))) > 0, 1, 0)) %>%
      mutate(!!paste("egnThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = "") := ifelse(rowSums(select(., starts_with("egnfactionThreeSingle") & matches(three_faction_wide$faction_keywords[i]))) > 0, 1, 0)) %>%
      mutate(!!paste("_pgnThreefactionSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = "") := ifelse(UQ(rlang::sym(paste("nThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("ngnThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = ""))) == 1, 1, 0)) %>%
      mutate(!!paste("_pgeThreefactionSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = "") := ifelse(UQ(rlang::sym(paste("eThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("egnThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = ""))) == 1, 1, 0)) %>%
      mutate(!!paste("_sameThreefactionSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = "") := UQ(rlang::sym(paste("nThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = ""))) == 1 & UQ(rlang::sym(paste("eThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = ""))) == 1) %>%
      select(UQ(rlang::sym(paste("eThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = ""))), UQ(rlang::sym(paste("nThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = ""))),
             UQ(rlang::sym(paste("egnThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = ""))), UQ(rlang::sym(paste("ngnThreeSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = ""))),
             paste("_pgnThreefactionSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = ""), paste("_pgeThreefactionSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = ""),
             paste("_sameThreefactionSingle",as.character(three_faction_wide$faction_with_nofaction_as_faction[i]), sep = ""))
  }
  
  onehot.three.faction.connection <- onehot.three.faction.connection %>%
    cbind(three.faction.dummy.result) %>%
    mutate(pgnThreefactionSingle = ifelse(rowSums(select(., starts_with("_pgnThreefactionSingle"))) > 0, 1, 0),
           pgeThreefactionSingle = ifelse(rowSums(select(., starts_with("_pgeThreefactionSingle"))) > 0, 1, 0),) %>%
    select(-starts_with("_pgnThreefactionSingle"),
           -starts_with("_pgeThreefactionSingle")) %>%
    mutate(sameThreefactionSingle = ifelse(rowSums(select(., starts_with("_sameThreefactionSingle"))) > 0, 1, 0)) %>%
    select(-starts_with("_sameThreefactionSingle")) %>%
    mutate(nThreeSingle_num = rowSums(select(., starts_with("nThreeSingle"))),
           eThreeSingle_num = rowSums(select(., starts_with("eThreeSingle")))) %>%
    mutate(nThreefactionSingleno1 = ifelse(nThreeSingle_num == 0 & eThreeSingle_num != 0, 1, 0),
           eThreefactionSingleno1 = ifelse(nThreeSingle_num != 0 & eThreeSingle_num == 0, 1, 0),
           bThreefactionSingleno = ifelse(nThreeSingle_num == 0 & eThreeSingle_num == 0, 1, 0),
           bThreefactionSingleyes = ifelse(nThreeSingle_num != 0 & eThreeSingle_num != 0, 1, 0),
           nThreefactionSingle1 = ifelse(nThreeSingle_num == 1, 1, 0),
           nThreefactionSingle2 = ifelse(nThreeSingle_num > 1, 1, 0),
           eThreefactionSingle1 = ifelse(eThreeSingle_num == 1, 1, 0),
           eThreefactionSingle2 = ifelse(eThreeSingle_num > 1, 1, 0)) %>%
    select(-ends_with("_num"))
  
  return(onehot.three.faction.connection)
} %>%
  mutate_all(replace_na, replace = 0)

# Combine above data to dyad.provnce.data
dyad.province.data <- dyad.province.data %>% 
  cbind(onehot.workplace.connection) %>% 
  cbind(onehot.three.faction.connection.none) %>% 
  cbind(onehot.three.faction.connection.single)


# Load Official Rank Data -------------------------------------------------
official.rank <- read_csv("data/official_rank.csv")

# Load Network Power Data -------------------------------------------------
patron.connection.power <- read_csv("data/patron_network_power.csv") %>% 
  select(Pinyin.patron.name, year, patron_connection_power)


# Generate Patron Power ---------------------------------------------------
national.cadres.news.count <- read_csv("data/patron_power_by_year.csv") %>% 
  right_join(official.rank, fill = 0) %>%
  mutate(news_count = ifelse(is.na(news_count), 0, news_count)) %>%
  mutate(patron_power = ifelse(is.na(official_rank), 0, news_count)) %>%
  group_by(year) %>%
  mutate(patron_power_all = sum(patron_power)) %>%
  mutate(relative_patron_power = patron_power/patron_power_all) %>%
  ungroup() %>%
  .[,c("Pinyin_name","year","relative_patron_power")] %>%
  spread(Pinyin_name, relative_patron_power, fill = 0)


# Generate Title Patron Power ---------------------------------------------
national.cadres.title.news.count <- read_csv("data/patron_title_power_by_year.csv") %>% 
  right_join(official.rank, fill = 0) %>%
  mutate(news_count = ifelse(is.na(news_count), 0, news_count)) %>%
  mutate(patron_power = ifelse(is.na(official_rank), 0, news_count)) %>%
  group_by(year) %>%
  mutate(patron_power_all = sum(patron_power)) %>%
  mutate(relative_patron_power = patron_power/patron_power_all) %>%
  ungroup() %>%
  .[,c("Pinyin_name","year","relative_patron_power")] %>%
  spread(Pinyin_name, relative_patron_power, fill = 0)

names(national.cadres.title.news.count)[-1] <- paste(names(national.cadres.news.count)[-1], "title", sep = "_")

# Generate Absolute Patron Power ------------------------------------------
national.cadres.absolute.power <- read_csv("data/patron_power_by_year.csv") %>% 
  right_join(official.rank, fill = 0) %>%
  mutate(news_count = ifelse(is.na(news_count), 0, news_count)) %>%
  mutate(patron_power = ifelse(is.na(official_rank), 0, news_count)) %>%
  .[,c("Pinyin_name","year","patron_power")] %>%
  spread(Pinyin_name, patron_power, fill = 0)

names(national.cadres.absolute.power)[-1] <- paste(names(national.cadres.news.count)[-1], "absolute", sep = "_")


# Rule out President and Premier's Power ----------------------------------
national.cadres.NoPP.power <- read_csv("data/patron_power_by_year.csv") %>% 
  right_join(official.rank, fill = 0) %>%
  mutate(news_count = ifelse(is.na(news_count), 0, news_count)) %>%
  mutate(patron_power = ifelse(is.na(official_rank), 0, news_count)) %>%
  # Here we set the power of incumbent president and premier to zero
  mutate(patron_power = ifelse(Pinyin_name %in% c("Jiangzemin", "Zhurongji") & year >= 2000 & year <= 2002, 0,
                               ifelse(Pinyin_name %in% c("Hujintao", "Wenjiabao") & year >= 2003 & year <= 2012, 0,
                                      ifelse(Pinyin_name %in% c("Xijinping", "Likeqiang") & year >= 2013 & year <= 2014, 0, patron_power)))) %>%
  group_by(year) %>%
  mutate(patron_power_all = sum(patron_power)) %>%
  mutate(patron_power_NoPP = patron_power/patron_power_all) %>%
  ungroup() %>%
  .[,c("Pinyin_name","year","patron_power_NoPP")] %>%
  spread(Pinyin_name, patron_power_NoPP, fill = 0)

names(national.cadres.NoPP.power)[-1] <- paste(names(national.cadres.news.count)[-1], "NoPP", sep = "_")


# Consider only President and Premier have power --------------------------
national.cadres.PP.power <- read_csv("data/patron_power_by_year.csv") %>% 
  right_join(official.rank, fill = 0) %>%
  mutate(news_count = ifelse(is.na(news_count), 0, news_count)) %>%
  mutate(patron_power = ifelse(is.na(official_rank), 0, news_count)) %>%
  mutate(patron_power = ifelse(!Pinyin_name %in% c("Jiangzemin", "Zhurongji") & year >= 2000 & year <= 2002, 0,
                               ifelse(!Pinyin_name %in% c("Hujintao", "Wenjiabao") & year >= 2003 & year <= 2012, 0,
                                      ifelse(!Pinyin_name %in% c("Xijinping", "Likeqiang") & year >= 2013 & year <= 2014, 0, patron_power)))) %>%
  group_by(year) %>%
  mutate(patron_power_all = sum(patron_power)) %>%
  mutate(patron_power_PP = patron_power/patron_power_all) %>%
  ungroup() %>%
  .[,c("Pinyin_name","year","patron_power_PP")] %>%
  spread(Pinyin_name, patron_power_PP, fill = 0)

names(national.cadres.PP.power)[-1] <- paste(names(national.cadres.news.count)[-1], "PP", sep = "_")

# Generate Power Defined by Official Rank ---------------------------------
national.cadres.official.rank <- read_csv("data/patron_power_by_year.csv") %>% 
  right_join(official.rank, fill = 0) %>%
  mutate(news_count = ifelse(is.na(news_count), 0, news_count)) %>%
  mutate(patron_power = ifelse(is.na(official_rank), 0, news_count)) %>%
  group_by(year) %>%
  mutate(patron_power_all = sum(patron_power)) %>%
  mutate(relative_patron_power = patron_power/patron_power_all) %>%
  ungroup() %>%
  .[,c("Pinyin_name","year","official_rank")] %>%
  spread(Pinyin_name, official_rank, fill = 0)

names(national.cadres.official.rank)[-1] <- paste(names(national.cadres.news.count)[-1], "rank", sep = "_")

# Generate Power Defined by Connection ------------------------------------
national.cadres.connection.power <- read_csv("data/patron_power_by_year.csv") %>% 
  right_join(patron.connection.power, by = c("Pinyin_name" = "Pinyin.patron.name", "year" = "year"), fill = 0) %>%
  right_join(official.rank, fill = 0) %>%
  mutate(news_count = ifelse(is.na(news_count), 0, news_count)) %>%
  .[,c("Pinyin_name","year","patron_connection_power")] %>%
  distinct() %>%
  spread(Pinyin_name, patron_connection_power, fill = 0)

names(national.cadres.connection.power)[-1] <- paste(names(national.cadres.connection.power)[-1], "network", sep = "_")

# Generate three factions power (consider no faction as single patron)
three.faction.single <- read_csv("data/three_faction_list.csv") %>% 
  mutate(faction = recode(faction, `Jiang` = "Shanghai")) %>% 
  mutate(faction_with_nofaction_as_faction = recode(faction, `nofaction` = Pinyin_name))

national.cadres.three.factions.single <-  read_csv("data/patron_power_by_year.csv") %>%
  right_join(three.faction.single, fill = 0) %>%
  mutate(patron_power = ifelse(is.na(news_count), 0, news_count)) %>%
  group_by(year, faction_with_nofaction_as_faction) %>% 
  summarise(three_faction_power = sum(patron_power)) %>% 
  ungroup() %>% 
  group_by(year) %>%
  mutate(three_faction_power_all = sum(three_faction_power)) %>%
  mutate(relative_three_faction_power = three_faction_power/three_faction_power_all) %>%
  ungroup() %>% 
  select(faction_with_nofaction_as_faction, year, relative_three_faction_power) %>% 
  spread(faction_with_nofaction_as_faction, relative_three_faction_power, fill = 0)

names(national.cadres.three.factions.single)[-1] <- paste(names(national.cadres.three.factions.single)[-1], "3_faction_single", sep = "_")

# Generate three factions power (consider no faction as non patron)
three.faction.none <- read_csv("data/three_faction_list.csv") %>% 
  # let's combine the Jiang faction into the Shanghai faction
  mutate(faction = recode(faction, `Jiang` = "Shanghai"))

national.cadres.three.factions.none <- read_csv("data/patron_power_by_year.csv") %>%
  right_join(three.faction.none, fill = 0) %>%
  mutate(patron_power = ifelse(is.na(news_count), 0, news_count)) %>%
  # now the three_faction_power is considering the nofaction as single patron
  group_by(year, faction) %>% 
  summarise(three_faction_power = sum(patron_power)) %>% 
  ungroup() %>% 
  group_by(year) %>%
  mutate(three_faction_power_all = sum(three_faction_power)) %>%
  mutate(relative_three_faction_power = three_faction_power/three_faction_power_all) %>%
  ungroup() %>% 
  select(faction, year, relative_three_faction_power) %>% 
  spread(faction, relative_three_faction_power, fill = 0)

names(national.cadres.three.factions.none)[-1] <- paste(names(national.cadres.three.factions.none)[-1], "3_faction_none", sep = "_")

# Combine power defined by different definitions --------------------------
national.cadres.final.data <- national.cadres.news.count %>% 
  left_join(national.cadres.title.news.count) %>%
  left_join(national.cadres.official.rank) %>%
  left_join(national.cadres.absolute.power) %>% 
  left_join(national.cadres.NoPP.power) %>% 
  left_join(national.cadres.PP.power) %>% 
  left_join(national.cadres.connection.power) %>% 
  left_join(national.cadres.three.factions.single) %>%
  left_join(national.cadres.three.factions.none)

final.stata.data <- dyad.province.data %>%
  merge(national.cadres.final.data, all.x = TRUE) %>%
  mutate(eClanInfluence = 0) %>%
  mutate(nClanInfluence = 0) %>%
  mutate(eTitleClanInfluence = 0) %>%
  mutate(nTitleClanInfluence = 0) %>%
  mutate(eClanInfluenceNoPP = 0) %>%
  mutate(nClanInfluenceNoPP = 0) %>%
  mutate(eClanInfluencePP = 0) %>%
  mutate(nClanInfluencePP = 0) %>%
  mutate(eClanInfluenceAbsolute = 0) %>%
  mutate(nClanInfluenceAbsolute = 0) %>%
  mutate(eworkClanInfluence = 0) %>%
  mutate(nworkClanInfluence = 0) %>%
  mutate(ecolleagueClanInfluence = 0) %>%
  mutate(ncolleagueClanInfluence = 0) %>%
  mutate(esupervisorClanInfluence = 0) %>%
  mutate(nsupervisorClanInfluence = 0) %>%
  mutate(egnClanInfluence = 0) %>%
  mutate(ngnClanInfluence = 0) %>%
  mutate(eClanInfluence_max = 0) %>%
  mutate(nClanInfluence_max = 0) %>%
  mutate(egnClanInfluence_max = 0) %>%
  mutate(ngnClanInfluence_max = 0) %>%
  mutate(e_strongest_faction = 0) %>% 
  mutate(n_strongest_faction = 0) %>% 
  mutate(egn_strongest_faction = 0) %>% 
  mutate(ngn_strongest_faction = 0) %>% 
  mutate(eThreeClanInfluenceSingle = 0) %>%
  mutate(nThreeClanInfluenceSingle = 0) %>%
  mutate(egnThreeClanInfluenceSingle = 0) %>%
  mutate(ngnThreeClanInfluenceSingle = 0) %>%
  mutate(eThreeClanInfluenceNone = 0) %>%
  mutate(nThreeClanInfluenceNone = 0) %>%
  mutate(egnThreeClanInfluenceNone = 0) %>%
  mutate(ngnThreeClanInfluenceNone = 0) %>%
  mutate(eRank = 0) %>%
  mutate(nRank = 0) %>%
  mutate(egnRank = 0) %>%
  mutate(ngnRank = 0) %>%
  mutate(eRank_high = 0) %>%
  mutate(nRank_high = 0) %>%
  mutate(egnRank_high = 0) %>%
  mutate(ngnRank_high = 0) %>%
  mutate(eRank_total = 0) %>%
  mutate(nRank_total = 0) %>%
  mutate(egnRank_total = 0) %>%
  mutate(ngnRank_total = 0) %>%
  mutate(eNetwork = 0) %>%
  mutate(nNetwork = 0) %>%
  mutate(eNetwork_high = 0) %>%
  mutate(nNetwork_high = 0) %>%
  mutate(eNetwork_total = 0) %>%
  mutate(nNetwork_total = 0) %>%
  mutate(eTies = 0) %>%
  mutate(nTies = 0) %>%
  mutate(egnTies = 0) %>%
  mutate(ngnTies = 0) %>%
  as.data.frame()

for (name in Chinese.Pinyin.name$Pinyin_name) {
  calculate.data <- final.stata.data %>%
    select(contains(name)) %>%
    mutate_(eClanInfluence = paste('+', paste("e",name,sep = ""), '*', name)) %>%
    mutate_(nClanInfluence = paste('+', paste("n",name,sep = ""), '*', name)) %>%
    mutate_(eTitleClanInfluence = paste('+', paste("e",name,sep = ""), '*', paste(name, "title", sep = "_"))) %>%
    mutate_(nTitleClanInfluence = paste('+', paste("n",name,sep = ""), '*', paste(name, "title", sep = "_"))) %>%
    mutate_(eClanInfluenceNoPP = paste('+', paste("e",name,sep = ""), '*', paste(name, "NoPP", sep = "_"))) %>%
    mutate_(nClanInfluenceNoPP = paste('+', paste("n",name,sep = ""), '*', paste(name, "NoPP", sep = "_"))) %>%
    mutate_(eClanInfluencePP = paste('+', paste("e",name,sep = ""), '*', paste(name, "PP", sep = "_"))) %>%
    mutate_(nClanInfluencePP = paste('+', paste("n",name,sep = ""), '*', paste(name, "PP", sep = "_"))) %>%
    mutate_(eClanInfluenceAbsolute = paste('+', paste("e",name,sep = ""), '*', paste(name, "absolute", sep = "_"))) %>%
    mutate_(nClanInfluenceAbsolute = paste('+', paste("n",name,sep = ""), '*', paste(name, "absolute", sep = "_"))) %>%
    mutate_(eworkClanInfluence = paste('+', paste("ework",name,sep = ""), '*', name)) %>%
    mutate_(nworkClanInfluence = paste('+', paste("nwork",name,sep = ""), '*', name)) %>%
    mutate_(ecolleagueClanInfluence = paste('+', paste("ecolleague",name,sep = ""), '*', name)) %>%
    mutate_(ncolleagueClanInfluence = paste('+', paste("ncolleague",name,sep = ""), '*', name)) %>%
    mutate_(esupervisorClanInfluence = paste('+', paste("esupervisor",name,sep = ""), '*', name)) %>%
    mutate_(nsupervisorClanInfluence = paste('+', paste("nsupervisor",name,sep = ""), '*', name)) %>%
    mutate_(egnClanInfluence = paste('+', paste("egn",name,sep = ""), '*', name)) %>%
    mutate_(ngnClanInfluence = paste('+', paste("ngn",name,sep = ""), '*', name)) %>%
    mutate_(eRank = paste(paste("e",name,sep = ""), '*', paste(name,"rank", sep = "_"))) %>%
    mutate_(nRank = paste(paste("n",name,sep = ""), '*', paste(name,"rank", sep = "_"))) %>%
    mutate_(egnRank = paste(paste("egn",name,sep = ""), '*', paste(name,"rank", sep = "_"))) %>%
    mutate_(ngnRank = paste(paste("ngn",name,sep = ""), '*', paste(name,"rank", sep = "_"))) %>%
    mutate_(eNetwork = paste(paste("e",name,sep = ""), '*', paste(name,"network", sep = "_"))) %>%
    mutate_(nNetwork = paste(paste("n",name,sep = ""), '*', paste(name,"network", sep = "_"))) %>%
    mutate_(eTies = paste("e",name,sep = "")) %>%
    mutate_(nTies = paste("n",name,sep = "")) %>%
    mutate_(egnTies = paste("egn",name,sep = "")) %>%
    mutate_(ngnTies = paste("ngn",name,sep = ""))
    
  
  final.stata.data <- final.stata.data %>%
    # generate the strongest faction name
    mutate(e_strongest_faction = ifelse(eClanInfluence_max >= calculate.data$eClanInfluence, e_strongest_faction, Chinese.Pinyin.name[which(Chinese.Pinyin.name$Pinyin_name == name), "patron_code"]),
           n_strongest_faction = ifelse(nClanInfluence_max >= calculate.data$nClanInfluence, n_strongest_faction, Chinese.Pinyin.name[which(Chinese.Pinyin.name$Pinyin_name == name), "patron_code"]),
           egn_strongest_faction = ifelse(egnClanInfluence_max >= calculate.data$egnClanInfluence, egn_strongest_faction , Chinese.Pinyin.name[which(Chinese.Pinyin.name$Pinyin_name == name), "patron_code"]),
           ngn_strongest_faction = ifelse(ngnClanInfluence_max >= calculate.data$ngnClanInfluence, ngn_strongest_faction, Chinese.Pinyin.name[which(Chinese.Pinyin.name$Pinyin_name == name), "patron_code"])) %>% 
    
    mutate(eClanInfluence_max = ifelse(eClanInfluence_max >= calculate.data$eClanInfluence, eClanInfluence_max, calculate.data$eClanInfluence),
           nClanInfluence_max = ifelse(nClanInfluence_max >= calculate.data$nClanInfluence, nClanInfluence_max, calculate.data$nClanInfluence),
           egnClanInfluence_max = ifelse(egnClanInfluence_max >= calculate.data$egnClanInfluence, egnClanInfluence_max, calculate.data$egnClanInfluence),
           ngnClanInfluence_max = ifelse(ngnClanInfluence_max >= calculate.data$ngnClanInfluence, ngnClanInfluence_max, calculate.data$ngnClanInfluence)) %>% 
    
    mutate(eClanInfluence = eClanInfluence + calculate.data$eClanInfluence) %>%
    mutate(nClanInfluence = nClanInfluence + calculate.data$nClanInfluence) %>%
    mutate(eTitleClanInfluence = eTitleClanInfluence + calculate.data$eTitleClanInfluence) %>%
    mutate(nTitleClanInfluence = nTitleClanInfluence + calculate.data$nTitleClanInfluence) %>%
    mutate(eClanInfluenceNoPP = eClanInfluenceNoPP + calculate.data$eClanInfluenceNoPP) %>%
    mutate(nClanInfluenceNoPP = nClanInfluenceNoPP + calculate.data$nClanInfluenceNoPP) %>%
    mutate(eClanInfluencePP = eClanInfluencePP + calculate.data$eClanInfluencePP) %>%
    mutate(nClanInfluencePP = nClanInfluencePP + calculate.data$nClanInfluencePP) %>%
    mutate(eClanInfluenceAbsolute = eClanInfluenceAbsolute + calculate.data$eClanInfluenceAbsolute) %>%
    mutate(nClanInfluenceAbsolute = nClanInfluenceAbsolute + calculate.data$nClanInfluenceAbsolute) %>%
    mutate(eworkClanInfluence = eworkClanInfluence + calculate.data$eworkClanInfluence) %>%
    mutate(nworkClanInfluence = nworkClanInfluence + calculate.data$nworkClanInfluence) %>%
    mutate(ecolleagueClanInfluence = ecolleagueClanInfluence + calculate.data$ecolleagueClanInfluence) %>%
    mutate(ncolleagueClanInfluence = ncolleagueClanInfluence + calculate.data$ncolleagueClanInfluence) %>%
    mutate(esupervisorClanInfluence = esupervisorClanInfluence + calculate.data$esupervisorClanInfluence) %>%
    mutate(nsupervisorClanInfluence = nsupervisorClanInfluence + calculate.data$nsupervisorClanInfluence) %>%
    mutate(egnClanInfluence = egnClanInfluence + calculate.data$egnClanInfluence) %>%
    mutate(ngnClanInfluence = ngnClanInfluence + calculate.data$ngnClanInfluence) %>%
    mutate(eTies = eTies + calculate.data$eTies) %>%
    mutate(nTies = nTies + calculate.data$nTies) %>%
    mutate(egnTies = egnTies + calculate.data$egnTies) %>%
    mutate(ngnTies = ngnTies + calculate.data$ngnTies) %>%
    mutate(eRank = 1 * calculate.data$eRank) %>%
    mutate(nRank = 1 * calculate.data$nRank) %>%
    mutate(egnRank = 1 * calculate.data$egnRank) %>%
    mutate(ngnRank = 1 * calculate.data$ngnRank) %>%
    rowwise() %>% 
    mutate(eRank_high = max(eRank_high, eRank)) %>%
    mutate(nRank_high = max(nRank_high, nRank)) %>%
    mutate(egnRank_high = max(egnRank_high, egnRank)) %>%
    mutate(ngnRank_high = max(ngnRank_high, ngnRank)) %>%
    ungroup() %>%
    mutate(eRank_total = eRank_total + eRank) %>%
    mutate(nRank_total = nRank_total + nRank) %>%
    mutate(egnRank_total = egnRank_total + egnRank) %>%
    mutate(ngnRank_total = ngnRank_total + ngnRank) %>%
    mutate(eNetwork = 1 * calculate.data$eNetwork) %>%
    mutate(nNetwork = 1 * calculate.data$nNetwork) %>%
    rowwise() %>% 
    mutate(eNetwork_high = max(eNetwork_high, eNetwork)) %>%
    mutate(nRNetwork_high = max(nNetwork_high, nNetwork)) %>%
    ungroup() %>%
    mutate(eNetwork_total = eNetwork_total + eNetwork) %>%
    mutate(nNetwork_total = nNetwork_total + nNetwork)
}

# 这里控制是single还是none
for (faction in c("nofaction", "Shanghai", "tuanpai", "princeling")) {
  calculate.data <- final.stata.data %>%
    select(contains(faction)) %>%
    mutate_(eThreeClanInfluenceNone = paste('+', paste("eThreeNone",faction,sep = ""), '*', paste(faction, "3_faction_none", sep = "_"))) %>%
    mutate_(nThreeClanInfluenceNone = paste('+', paste("nThreeNone",faction,sep = ""), '*', paste(faction, "3_faction_none", sep = "_"))) %>%
    mutate_(egnThreeClanInfluenceNone = paste('+', paste("egnThreeNone",faction,sep = ""), '*', paste(faction, "3_faction_none", sep = "_"))) %>%
    mutate_(ngnThreeClanInfluenceNone = paste('+', paste("ngnThreeNone",faction,sep = ""), '*', paste(faction, "3_faction_none", sep = "_")))

  final.stata.data <- final.stata.data %>%
    mutate(eThreeClanInfluenceNone = eThreeClanInfluenceNone + calculate.data$eThreeClanInfluenceNone) %>%
    mutate(nThreeClanInfluenceNone = nThreeClanInfluenceNone + calculate.data$nThreeClanInfluenceNone) %>%
    mutate(egnThreeClanInfluenceNone = egnThreeClanInfluenceNone + calculate.data$egnThreeClanInfluenceNone) %>%
    mutate(ngnThreeClanInfluenceNone = ngnThreeClanInfluenceNone + calculate.data$ngnThreeClanInfluenceNone)
}


# -------------------------------------------------------------------------
for (faction in c("Shanghai", "tuanpai", "princeling", "Heguoqiang", "Luogan", "Weijianxing", "Wenjiabao")) {
  calculate.data <- final.stata.data %>%
    select(contains(faction)) %>%
    mutate_(eThreeClanInfluenceSingle = paste('+', paste("eThreeSingle",faction,sep = ""), '*', paste(faction, "3_faction_single", sep = "_"))) %>%
    mutate_(nThreeClanInfluenceSingle = paste('+', paste("nThreeSingle",faction,sep = ""), '*', paste(faction, "3_faction_single", sep = "_"))) %>%
    mutate_(egnThreeClanInfluenceSingle = paste('+', paste("egnThreeSingle",faction,sep = ""), '*', paste(faction, "3_faction_single", sep = "_"))) %>%
    mutate_(ngnThreeClanInfluenceSingle = paste('+', paste("ngnThreeSingle",faction,sep = ""), '*', paste(faction, "3_faction_single", sep = "_")))
  

  final.stata.data <- final.stata.data %>%
    mutate(eThreeClanInfluenceSingle = eThreeClanInfluenceSingle + calculate.data$eThreeClanInfluenceSingle) %>%
    mutate(nThreeClanInfluenceSingle = nThreeClanInfluenceSingle + calculate.data$nThreeClanInfluenceSingle) %>%
    mutate(egnThreeClanInfluenceSingle = egnThreeClanInfluenceSingle + calculate.data$egnThreeClanInfluenceSingle) %>%
    mutate(ngnThreeClanInfluenceSingle = ngnThreeClanInfluenceSingle + calculate.data$ngnThreeClanInfluenceSingle)
}

final.stata.data <- final.stata.data %>%
  mutate(diffClanInfluence = nClanInfluence - eClanInfluence) %>%
  mutate(diffTitleClanInfluence = nTitleClanInfluence - eTitleClanInfluence) %>%
  mutate(diffClanInfluenceNoPP = nClanInfluenceNoPP - eClanInfluenceNoPP) %>% 
  mutate(diffClanInfluencePP = nClanInfluencePP - eClanInfluencePP) %>%
  # 把absolute power转化为log
  mutate(nClanInfluenceLog = log(nClanInfluenceAbsolute + 1)) %>% 
  mutate(eClanInfluenceLog = log(eClanInfluenceAbsolute + 1)) %>% 
  mutate(diffworkClanInfluence = nworkClanInfluence - eworkClanInfluence) %>% 
  mutate(diffcolleagueClanInfluence = ncolleagueClanInfluence - ecolleagueClanInfluence) %>% 
  mutate(diffsupervisorClanInfluence = nsupervisorClanInfluence - esupervisorClanInfluence) %>% 
  mutate(diffgnClanInfluence = ngnClanInfluence - egnClanInfluence) %>%
  mutate(relativeClanInfluence = (nClanInfluence + 1) / (eClanInfluence + 1)) %>%
  mutate(relativeClanInfluenceNoPP = (nClanInfluenceNoPP+ 1) / (eClanInfluenceNoPP + 1)) %>%
  mutate(relativeClanInfluencePP = (nClanInfluencePP+ 1) / (eClanInfluencePP + 1)) %>%
  mutate(relativeworkClanInfluence = (nworkClanInfluence + 1) / (eworkClanInfluence + 1)) %>%
  mutate(relativecolleagueClanInfluence = (ncolleagueClanInfluence + 1) / (ecolleagueClanInfluence + 1)) %>%
  mutate(relativesupervisorClanInfluence = (nsupervisorClanInfluence + 1) / (esupervisorClanInfluence + 1)) %>%
  mutate(gapInfluence = abs(nClanInfluence - eClanInfluence)) %>%
  mutate(gapInfluenceNoPP = abs(nClanInfluenceNoPP - eClanInfluenceNoPP)) %>%
  mutate(gapInfluencePP = abs(nClanInfluencePP - eClanInfluencePP)) %>%
  # three factions influence
  mutate(diffThreeClanInfluenceSingle = nThreeClanInfluenceSingle - eThreeClanInfluenceSingle) %>%
  mutate(diffgnThreeClanInfluenceSingle = ngnThreeClanInfluenceSingle - egnThreeClanInfluenceSingle) %>%
  # mutate(relativeThreeClanInfluenceSingle = (nThreeClanInfluenceSingle + 1) / (eThreeClanInfluenceSingle + 1)) %>%
  mutate(gapThreeInfluenceSingle = abs(nThreeClanInfluenceSingle - eThreeClanInfluenceSingle)) %>%
  mutate(diffThreeClanInfluenceNone = nThreeClanInfluenceNone - eThreeClanInfluenceNone) %>%
  mutate(diffgnThreeClanInfluenceNone = ngnThreeClanInfluenceNone - egnThreeClanInfluenceNone) %>%
  mutate(relativeThreeClanInfluenceNone = (nThreeClanInfluenceNone + 1) / (eThreeClanInfluenceNone + 1)) %>%
  mutate(gapThreeInfluenceNone = abs(nThreeClanInfluenceNone - eThreeClanInfluenceNone)) %>%
  mutate(diffClanInfluenceDummyMore = ifelse(diffClanInfluence > 0, 1, 0)) %>%
  mutate(diffClanInfluenceDummyLess = ifelse(diffClanInfluence < 0, 1, 0)) %>%
  mutate(diffClanInfluenceDummyENS = ifelse(diffClanInfluence == 0 & samefaction == 0, 1, 0)) %>%
  mutate(eAverageInfluence = ifelse(is.na(eClanInfluence/eTies), 0, eClanInfluence/eTies)) %>%
  mutate(nAverageInfluence = ifelse(is.na(nClanInfluence/nTies), 0, nClanInfluence/nTies)) %>%
  mutate(egnAverageInfluence = ifelse(is.na(egnClanInfluence/egnTies), 0, egnClanInfluence/egnTies)) %>%
  mutate(ngnAverageInfluence = ifelse(is.na(ngnClanInfluence/ngnTies), 0, ngnClanInfluence/ngnTies)) %>%
  mutate(diffAverageInfluence = nAverageInfluence - eAverageInfluence) %>%
  mutate(time_to_NPC = ifelse(year > 2012, 2017 - year, ifelse(year > 2007, 2012 - year, ifelse(year > 2002, 2007 - year, 2002 - year)))) %>%
  mutate(NPC_year = ifelse(year == 2002 | year == 2007 | year == 2012, 1, 0)) %>%
  mutate(head_NPC_year = ifelse(year == 2001 | year == 2006 | year == 2011, 1, 0)) %>%
  mutate(chairman_power = ifelse(year > 2012, Xijinping, ifelse(year > 2002, Hujintao, Jiangzemin))) %>%
  mutate(premier_power = ifelse(year > 2012, Likeqiang, ifelse(year > 2002, Wenjiabao, Zhurongji))) %>%
  mutate(eAverageRank = ifelse(is.na(eRank_total/eTies), 0, eRank_total/eTies)) %>%
  mutate(nAverageRank = ifelse(is.na(nRank_total/nTies), 0, nRank_total/nTies)) %>%
  mutate(eAverageNetwork = ifelse(is.na(eNetwork_total/eTies), 0, eNetwork_total/eTies)) %>%
  mutate(nAverageNetwork = ifelse(is.na(nNetwork_total/nTies), 0, nNetwork_total/nTies)) %>%
  mutate(egnAverageRank = ifelse(is.na(egnRank_total/egnTies), 0, egnRank_total/egnTies)) %>%
  mutate(ngnAverageRank = ifelse(is.na(ngnRank_total/ngnTies), 0, ngnRank_total/ngnTies)) %>%
  mutate(diffClanRank = nRank_total - eRank_total) %>%
  mutate(diffNetwork = nNetwork_total - eNetwork_total) %>%
  mutate(diffgnClanRank = ngnRank_total - egnRank_total) %>%
  mutate(bfactionyes = ifelse(ntotal != "" & etotal != "", 1, 0)) %>%
  mutate(bgnfactionyes = ifelse(ngntotal != "" & egntotal != "", 1, 0))


#combine the count of the province name and normalize the news count
province.name.news <- read_csv("data/event_news_province_name_news.csv")

final.stata.data.norm <- final.stata.data %>% 
  left_join(province.name.news, by = c("eventprov" = "eventprov", "newsprov" = "newsprov", "year" = "year")) %>% 
  mutate(corruptionreport_norm = corruptionreport/province_name_news) %>% 
  mutate(partylinecorruption_norm = partylinecorruption/province_name_party_news) %>% 
  mutate(comlinecorruption_norm = comlinecorruption/province_name_commerical_news) %>% 
  # remove intermediate variables
  select(-contains("factionThree"), -ends_with("_rank"), -ends_with("_PP"), -ends_with("_NoPP"), -ends_with("_title"), -ends_with("_absolute"), -ends_with("_network"))

write_dta(final.stata.data.norm, "data/final_stata_data_for_regression.dta")


